Importing The Required Libraries & Packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import seaborn as sns
import plotly.graph_objs as go
import plotly.offline as py
import matplotlib.ticker as mtick
plt.style.use('fivethirtyeight')
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
Changing The Default Working Directory & Path
os.getcwd()
'C:\\Users\\chand'
os.chdir('D:\\bala\\study things\\data science\\Batch 74 Day 37\\Batch 74 Day 37\\Project 11 Flask Project and Deployment\\')
Reading The Dataset Using Pandas Command
data = pd.read_csv('zomato.csv')
Checking the Null values of all the Columns in the dataset
data.isnull().sum()
url 0 address 0 name 0 online_order 0 book_table 0 rate 7775 votes 0 phone 1208 location 21 rest_type 227 dish_liked 28078 cuisines 45 approx_cost(for two people) 346 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
Dropping the unwanted column in the dataset
df = data.drop(['url','phone'],axis=1)
Checking the dataset whether its is having a duplicate values or not
print(df.duplicated().sum())
43
Dropping all the duplicate value from the dataset and checking the duplicate value after dropping it.
df.drop_duplicates(inplace=True)
display(df.duplicated().sum())
0
Dropping all the null value from the dataset and Checking the Null values of all the Columns after dropping it.
df.dropna(how='any',inplace=True)
display(df.isnull().sum())
address 0 name 0 online_order 0 book_table 0 rate 0 votes 0 location 0 rest_type 0 dish_liked 0 cuisines 0 approx_cost(for two people) 0 reviews_list 0 menu_item 0 listed_in(type) 0 listed_in(city) 0 dtype: int64
Getting all the column from the dataset
display(df.columns)
Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
'location', 'rest_type', 'dish_liked', 'cuisines',
'approx_cost(for two people)', 'reviews_list', 'menu_item',
'listed_in(type)', 'listed_in(city)'],
dtype='object')
Renaming the columns of the dataset for making it simple
df = df.rename(columns={'approx_cost(for two people)':'cost','listed_in(type)':'type','listed_in(city)':'city'})
Checking the column name from the dataset after renaming it
df.columns
Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
'location', 'rest_type', 'dish_liked', 'cuisines', 'cost',
'reviews_list', 'menu_item', 'type', 'city'],
dtype='object')
Getting all the unique value of the cost column from the dataset
display(df['cost'].unique())
array(['800', '300', '600', '700', '550', '500', '450', '650', '400',
'750', '200', '850', '1,200', '150', '350', '250', '1,500',
'1,300', '1,000', '100', '900', '1,100', '1,600', '950', '230',
'1,700', '1,400', '1,350', '2,200', '2,000', '1,800', '1,900',
'180', '330', '2,500', '2,100', '3,000', '2,800', '3,400', '40',
'1,250', '3,500', '4,000', '2,400', '1,450', '3,200', '6,000',
'1,050', '4,100', '2,300', '120', '2,600', '5,000', '3,700',
'1,650', '2,700', '4,500'], dtype=object)
Updating the cost column with the appropriate form of numerics
df['cost']=df['cost'].apply(lambda x:x.replace(',',''))
Getting all the unique value of the cost column from the dataset after changing it to numeric form
display(df['cost'].unique())
array(['800', '300', '600', '700', '550', '500', '450', '650', '400',
'750', '200', '850', '1200', '150', '350', '250', '1500', '1300',
'1000', '100', '900', '1100', '1600', '950', '230', '1700', '1400',
'1350', '2200', '2000', '1800', '1900', '180', '330', '2500',
'2100', '3000', '2800', '3400', '40', '1250', '3500', '4000',
'2400', '1450', '3200', '6000', '1050', '4100', '2300', '120',
'2600', '5000', '3700', '1650', '2700', '4500'], dtype=object)
Changing the data type of the cost column from "Object" to "Float"
df['cost']=df['cost'].astype('float')
Checking the value counts for cost columns from the dataset
df.cost.value_counts()
400.0 2485 500.0 2323 600.0 2175 300.0 1694 800.0 1664 1000.0 1242 700.0 1207 1200.0 908 200.0 873 1500.0 866 450.0 721 250.0 701 750.0 651 650.0 645 350.0 553 900.0 528 1300.0 496 1100.0 466 1400.0 453 550.0 440 150.0 371 2000.0 298 1600.0 241 1700.0 233 1800.0 193 3000.0 159 2500.0 143 850.0 122 100.0 120 2100.0 63 1900.0 58 950.0 47 2200.0 39 2800.0 31 3500.0 25 4000.0 24 2400.0 22 1350.0 18 180.0 17 3400.0 13 230.0 10 1250.0 9 40.0 8 1650.0 6 1450.0 5 2600.0 4 1050.0 4 4100.0 4 330.0 4 2700.0 2 3200.0 2 120.0 2 6000.0 2 4500.0 2 5000.0 1 3700.0 1 2300.0 1 Name: cost, dtype: int64
Updating the rate column without having "NEW" in it for the future processing
df = df.loc[df.rate !='NEW']
Getting all the unique value of the rate column from the dataset
display(df['rate'].unique())
array(['4.1/5', '3.8/5', '3.7/5', '4.6/5', '4.0/5', '4.2/5', '3.9/5',
'3.0/5', '3.6/5', '2.8/5', '4.4/5', '3.1/5', '4.3/5', '2.6/5',
'3.3/5', '3.5/5', '3.8 /5', '3.2/5', '4.5/5', '2.5/5', '2.9/5',
'3.4/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5', '4.8/5',
'3.9 /5', '4.2 /5', '4.0 /5', '4.1 /5', '2.9 /5', '2.7 /5',
'2.5 /5', '2.6 /5', '4.5 /5', '4.3 /5', '3.7 /5', '4.4 /5',
'4.9/5', '2.1/5', '2.0/5', '1.8/5', '3.4 /5', '3.6 /5', '3.3 /5',
'4.6 /5', '4.9 /5', '3.2 /5', '3.0 /5', '2.8 /5', '3.5 /5',
'3.1 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5', '2.1 /5',
'2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)
Updating the rate column as it can be tansformed into proper form of numerics
df['rate']=df['rate'].apply(lambda x:x.replace('/5',''))
Getting all the unique value of the rate column from the dataset after updating it.
display(df['rate'].unique())
array(['4.1', '3.8', '3.7', '4.6', '4.0', '4.2', '3.9', '3.0', '3.6',
'2.8', '4.4', '3.1', '4.3', '2.6', '3.3', '3.5', '3.8 ', '3.2',
'4.5', '2.5', '2.9', '3.4', '2.7', '4.7', '2.4', '2.2', '2.3',
'4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '2.9 ', '2.7 ', '2.5 ',
'2.6 ', '4.5 ', '4.3 ', '3.7 ', '4.4 ', '4.9', '2.1', '2.0', '1.8',
'3.4 ', '3.6 ', '3.3 ', '4.6 ', '4.9 ', '3.2 ', '3.0 ', '2.8 ',
'3.5 ', '3.1 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
'2.0 ', '1.8 '], dtype=object)
Assigning the Top 20 restaurants to the new variable for the simple visualization
chains = df['name'].value_counts()[:20]
Plotting the bar graph for the most famous restaurants(Top 20 restaurants) and saving the PNG file
plt.figure(figsize=(10,10))
chains=df['name'].value_counts()[:20]
sns.barplot(x=chains,y=chains.index,palette='deep')
plt.title('Most famous restaurants chains in Bengaluru')
plt.xlabel('Number of outlets')
plt.savefig('Most Famous Restaurants in Bengaluru.png')
plt.show()
Plotting the Pie-Chart with table booking value.
x=df['book_table'].value_counts()
colors = ['#800080', '#0000A0']
trace=go.Pie(labels=x.index,values=x,textinfo="value",
marker=dict(colors=colors,
line=dict(color='#001000', width=2)))
layout=go.Layout(title="Table booking",width=600,height=600)
fig=go.Figure(data=[trace],layout=layout)
py.iplot(fig, filename='pie_chart_subplots')
plt.show()
Plotting the bar Plot with the online delivery using seaborn count plot package and saving the PNG file
sns.countplot(x='online_order',data = df)
fig = plt.gcf()
fig.set_size_inches(10,10)
plt.title('Whether Restaurants deliver online or not')
plt.savefig(' Online delivery of restaurants.png')
plt.show()
Visualizing the data distribution of a rate column against the density distribution using Seaborn Distplot and saving the PNG file
sns.distplot(df['rate'],bins=20)
plt.title('Data distribution of rate column')
plt.savefig('Data distribution of rate column.png')
plt.show()
Getting the Minumim and Maximum Rating of the Restaurants
display(df['rate'].min())
display(df['rate'].max())
'1.8'
'4.9 '
Getting all the unique value of the rate column from the dataset
df.rate.unique()
array(['4.1', '3.8', '3.7', '4.6', '4.0', '4.2', '3.9', '3.0', '3.6',
'2.8', '4.4', '3.1', '4.3', '2.6', '3.3', '3.5', '3.8 ', '3.2',
'4.5', '2.5', '2.9', '3.4', '2.7', '4.7', '2.4', '2.2', '2.3',
'4.8', '3.9 ', '4.2 ', '4.0 ', '4.1 ', '2.9 ', '2.7 ', '2.5 ',
'2.6 ', '4.5 ', '4.3 ', '3.7 ', '4.4 ', '4.9', '2.1', '2.0', '1.8',
'3.4 ', '3.6 ', '3.3 ', '4.6 ', '4.9 ', '3.2 ', '3.0 ', '2.8 ',
'3.5 ', '3.1 ', '4.8 ', '2.3 ', '4.7 ', '2.4 ', '2.1 ', '2.2 ',
'2.0 ', '1.8 '], dtype=object)
Changing the data type of the cost column from "Object" to "Float"
df['rate']=df['rate'].astype('float')
Getting all the unique value of the rate column from the dataset after updating the data types
df.rate.unique()
array([4.1, 3.8, 3.7, 4.6, 4. , 4.2, 3.9, 3. , 3.6, 2.8, 4.4, 3.1, 4.3,
2.6, 3.3, 3.5, 3.2, 4.5, 2.5, 2.9, 3.4, 2.7, 4.7, 2.4, 2.2, 2.3,
4.8, 4.9, 2.1, 2. , 1.8])
plotting the Bar Graph using matplotlib package with the rate column grouping it by the unit difference and saving the PNG file
group = [1,2,3,4,5]
plt.hist(df['rate'],group,histtype='bar',rwidth=0.5,color='b')
plt.title('Rating Unit Difference size')
plt.savefig('Rating unit Difference Size.png')
plt.show()
plotting the Bar Graph using matplotlib package with the rate column grouping it by the unit difference and saving the PNG file
group = [1.5,2,2.5,3,3.5,4,4.5,5]
plt.hist(df['rate'],group,histtype='bar',rwidth=0.5,color='b')
plt.title('Rating 0.5 Unit Difference size')
plt.savefig('Rating 0.5 unit Difference Size.png')
plt.show()
Assigning the different variables for the rate column for using Pie Chart to visualize the percentage of restaurants according to ratings.
gr_1to2=((df['rate']>=1) & (df['rate']<2)).sum()
gr_2to3=((df['rate']>=2) & (df['rate']<3)).sum()
gr_3to4=((df['rate']>=3) & (df['rate']<4)).sum()
gr_4to5=(df['rate']>=4).sum()
Plotting the pie chart with assigned value to visualize the percentage of restaurants according to ratings and saving the PNG file.
slices=[gr_1to2,gr_2to3,gr_3to4,gr_4to5]
labels=['Rating 1 to 2','Rating 2 to 3','Rating 3 to 4','Rating >4']
colors = ['#ff3333','#c2c2d6','#6699ff']
plt.pie(slices,colors=colors, labels=labels, autopct='%1.0f%%', pctdistance=.5, labeldistance=1.2,shadow=True)
fig = plt.gcf()
plt.title("Percentage of Restaurants according to their ratings")
plt.savefig('Percentage of Restaurants according to their ratings.png')
fig.set_size_inches(10,10)
plt.show()
Plotting the bar Plot with the service type using seaborn count plot package and saving the PNG file
sns.countplot(x= 'type', data = df).set_xticklabels(sns.countplot(x= 'type' , data = df).get_xticklabels(), rotation=90, ha="right")
fig = plt.gcf()
fig.set_size_inches(20,12)
plt.title('Type of Service')
plt.savefig('type of Service.png')
plt.show()
Grouping by the cost column from the dataset by their sizes
print (df.groupby('cost').size())
cost 40.0 8 100.0 120 120.0 2 150.0 371 180.0 17 200.0 869 230.0 10 250.0 697 300.0 1688 330.0 4 350.0 553 400.0 2478 450.0 719 500.0 2321 550.0 440 600.0 2168 650.0 644 700.0 1202 750.0 651 800.0 1647 850.0 122 900.0 524 950.0 47 1000.0 1218 1050.0 4 1100.0 450 1200.0 903 1250.0 8 1300.0 496 1350.0 18 1400.0 447 1450.0 5 1500.0 841 1600.0 237 1650.0 6 1700.0 229 1800.0 193 1900.0 58 2000.0 298 2100.0 60 2200.0 39 2300.0 1 2400.0 22 2500.0 143 2600.0 4 2700.0 2 2800.0 31 3000.0 159 3200.0 2 3400.0 13 3500.0 25 3700.0 1 4000.0 24 4100.0 4 4500.0 2 5000.0 1 6000.0 2 dtype: int64
Plotting the box plot with cost value
from plotly.offline import iplot
trace0=go.Box(y=df['cost'],name="accepting online orders",
marker = dict(
color = 'rgb(113, 10, 100)',
))
data=[trace0]
layout=go.Layout(title="Box plot of approximate cost",width=800,height=800,yaxis=dict(title="Price"))
fig=go.Figure(data=data,layout=layout)
py.iplot(fig)
Visualizing the data distribution of a cost column against the density distribution using Seaborn Distplot and saving the PNG file
plt.figure(figsize=(8,8))
sns.distplot(df['cost'])
plt.title('Data distribution of cost column')
plt.savefig('Data distribution of cost column.png')
plt.show()
By using Regular Expression Package,splitting occurs in disliked columns as multiple values seperated by comma,Extract each dishes and creating a list by appending the each dishes
import re
df.index=range(df.shape[0])
likes=[]
for i in range(df.shape[0]):
array_split=re.split(',',df['dish_liked'][i])
for item in array_split:
likes.append(item)
Displaying the each number of dishes and its counts
display ('Number of dishes', len(likes))
display(likes)
'Number of dishes'
126848
['Pasta', ' Lunch Buffet', ' Masala Papad', ' Paneer Lajawab', ' Tomato Shorba', ' Dum Biryani', ' Sweet Corn Soup', 'Momos', ' Lunch Buffet', ' Chocolate Nirvana', ' Thai Green Curry', ' Paneer Tikka', ' Dum Biryani', ' Chicken Biryani', 'Churros', ' Cannelloni', ' Minestrone Soup', ' Hot Chocolate', ' Pink Sauce Pasta', ' Salsa', ' Veg Supreme Pizza', 'Masala Dosa', 'Panipuri', ' Gol Gappe', 'Onion Rings', ' Pasta', ' Kadhai Paneer', ' Salads', ' Salad', ' Roti', ' Jeera Rice', 'Farmhouse Pizza', ' Chocolate Banana', ' Virgin Mojito', ' Pasta', ' Paneer Tikka', ' Lime Soda', ' Prawn Pizza', 'Pizza', ' Mocktails', ' Coffee', ' Nachos', ' Salad', ' Pasta', ' Sandwiches', 'Waffles', ' Pasta', ' Coleslaw Sandwich', ' Choco Waffle', ' Tacos', ' Momos', ' Cheese Nachos', 'Waffles', ' Pasta', ' Crispy Chicken', ' Honey Chilli Chicken', ' Sandwich', ' Coffee', ' Crepe', 'Mocktails', ' Peri Fries', ' Lasagne', ' Pizza', ' Chicken Bbq Wings', ' Virgin Mojito', ' Nachos', 'Coffee', ' Spaghetti', ' Pancakes', ' Nachos', ' Pasta', ' Sandwich', ' Garlic Bread', 'Sandwich', ' Omelette', ' Ice Tea', ' Virgin Mojito', ' Hot Chocolate', ' Pasta Arrabiata', ' Hazelnut Cappuccino', 'Churros', ' Cannelloni', ' Minestrone Soup', ' Hot Chocolate', ' Pink Sauce Pasta', ' Salsa', ' Veg Supreme Pizza', 'Garlic Bread', ' Burgers', ' Sandwiches', ' Pizza', ' Hot Chocolate Fudge', ' Waffles', ' Potato Wedges', 'Momos', ' Mushroom Bruschettas', ' Jalapeno Cheeseballs', ' Pasta', ' Red Velvet Shake', ' Red Velvet Milkshake', ' Maggi', 'Pasta', ' Gelato', ' Garlic Bread', ' Mojito', ' Nachos', ' Paneer Pizza', ' Mocktails', 'Cheese Maggi', ' Peri Peri Fries', ' Pasta Arrabiata', 'Burgers', ' Caramel Milkshake', ' Iced Coffee', ' Iced Tea', 'Pizza', ' Garlic Bread', 'Pasta', ' Iced Tea', 'Paratha', ' Sandwich', ' Chilli Idli', ' Noodles', ' Sandwiches', ' Lime Soda', 'Pizza', ' Fries', ' Anjeer Ice Cream', ' Sandwiches', ' Gelato', ' Burgers', ' Kesar Badam', 'Burgers', ' Lassi', ' Chicken Grill', ' Naan', ' Momos', ' Chicken Burger', ' Biryani', 'Paneer Tikka', ' Garlic Bread', ' Thin Crust Pizza', ' Falafel', ' Veggie Pizza', 'Rolls', ' Veggie Wrap', ' Chocolate Fantasy', ' Rice Bowls', ' Chicken Rice', ' Paneer Makhanwala', ' Chicken Tikka Wrap', 'Raita', ' Gulab Jamun', ' Paneer Biryani', ' Vegetable Biryani', ' Gosht Dum Biryani', ' Chicken Biryani', ' Murgh Biryani', 'Noodles', ' Momos', ' Veg Manchow Soup', ' Kung Pao Chicken', ' Chicken Lollipop', ' Chop Suey', ' Dragon Chicken', 'Chicken Tikka Masala', ' Mutton Keema Dosa', ' Mutton Biryani', ' Raan Biryani', ' Brain Fry', ' Chicken Grill', ' Ghee Rice', 'Dabeli', ' Faluda', ' Masala Poori', ' Sandwiches', ' Tokri Chaat', ' Pav Bhaji', ' Dahipuri', 'Burgers', ' Fries', ' Hot Chocolate', 'Pane Pizza', ' Lava Cake', ' Pasta', ' Stuffed Garlic Bread', ' Chocolava Cake', ' Tacos', 'Farmhouse Pizza', ' Chocolate Banana', ' Virgin Mojito', ' Pasta', ' Paneer Tikka', ' Lime Soda', ' Prawn Pizza', 'Prompt Service', 'Biryani', 'Butter Chicken', ' Noodles', ' Thali', ' Biryani', 'Momos', ' Dragon Chicken', ' Hakka Noodles', ' Chowmein', ' Chop Suey', ' Chilli Garlic Noodles', ' Garlic Rice', 'Paratha', ' Pasta', 'Hot Chocolate Fudge', ' Vanilla Ice Cream', ' Fudge Cake', ' Chocolate Mousse', 'Prawn Biryani', ' Dragon Chicken', ' Chicken Boneless Biryani', ' Afghani Biryani', ' Mughlai Biryani', ' Paneer Tikka Biryani', ' Gongura Biryani', 'Pasta', ' Momos', ' Rolls', ' Chicken Roll', ' Chilli Chicken', ' Paneer Roll', ' Spaghetti', 'Salads', ' Sandwiches', ' Salad', ' Thai Rice', ' Pasta', ' Noodles', ' Pizza', 'Sushi', ' Noodles', ' Wine', ' Khau Suey', ' Mocktails', ' Yakisoba', ' Spring Roll', 'Pasta', ' Pepper Pizza', ' Paneer Pizza', ' Peppy Paneer', ' Nachos', ' Supreme Pizza', ' Manchow Soup', 'Sea Food', ' Crab Meat', ' Crab Soup', ' Fish Curry', ' Chicken Cafreal', ' Butter Naan', ' Butter Chicken', 'Salads', ' Panneer Butter Masala', ' Rabri', ' Gajar Ka Halwa', ' Manchow Soup', ' Aam Panna', ' Tomato Basil Soup', 'Grape Juice', ' Chicken Grill', ' Chicken Tikka Roll', ' Shawarma Roll', ' Chicken Shawarama', ' Biryani Combo', ' Rolls', 'Beer', ' Wedges', ' Pizza', ' Nachos', ' Salads', ' Papaya Salad', ' Buffalo Wings', 'Vegetable Biryani', ' Ginger Chicken', ' Chicken Biryani', ' Donne Biryani', ' Chicken Kebab', ' Egg Bhurji', ' Dum Biryani', 'Rolls', ' Momos', ' Fries', ' Veggie Roll', ' Chicken Kebab Roll', ' Paneer Roll', ' Cheese Roll', 'Benne Dosa', ' Mirchi Bajji', ' Filter Coffee', ' Poha', ' Tea', ' Butter Dosa', ' Crispy Dosa', 'Masala Dosa', 'Berryblast', ' Waffles', ' Cheesecake', ' Salted Caramel', 'Chicken Boneless Biryani', ' Hyderabadi Biryani', ' Paneer Biryani', ' Tandoori Chicken', ' Vegetable Biryani', ' Chicken Kebab', ' Raita', 'Biryani', ' Chicken Guntur', ' Thali', ' Buttermilk', ' Manchurian', 'Chilli Chicken', 'Panipuri', ' Chaat', ' Noodles', ' Dahi Bhalle', 'Hot Chocolate Fudge', 'Rooftop Ambience', 'Coffee', ' Tea', ' Kesari Bath', ' Open Dosa', ' Vada', ' Idli', ' Bread Dosa', 'Cup Cake', ' Chocolate Cake', 'Masala Dosa', ' Tirupathi Pongal', ' Filter Coffee', ' Vada', ' Kharabath', ' Kali Dosa', ' Rava Idli', 'Panner Tikka', ' Kulfi', ' Coffee', ' Pav Bhaji', ' Carrot Halwa', ' Fried Rice', ' Set Dosa', 'Paneer Tikka', ' Dal Kichadi', ' French Fries', 'Filter Coffee', ' Masala Dosa', ' Idli', ' Vada', 'Rolls', ' Shawarma', ' Butter Chicken', ' Mutton Biryani', ' Lahori Chicken', ' Chicken Roll', ' Grape Juice', 'Rolls', ' Paratha', ' Chicken Biryani', 'Paratha', ' Mughlai Chicken', ' Masala Lemonade', ' Butter Chicken', ' Rajma', ' Sandwich', ' Salad', 'Burgers', ' French Fries', 'Rolls', ' Ice Cream Roll', ' Paan Ice Cream', ' Coffee', ' Vanilla Ice Cream', ' Chocolate Icecreams', ' Mississippi Mudpie', 'Kadhai Paneer', ' Ghee Rice', ' Chicken Grill', ' Shawarma', ' Paratha', ' Biryani', ' Butter Chicken', 'Chicken Biryani', ' Rolls', 'Roast Pork', ' Tai Pai Chicken', ' Beer', ' Sweet Corn Chicken Soup', ' Masala Peanuts', ' Chinese Chop Suey', ' Chicken Lollipop', 'Pizza', ' Nachos', ' Garlic Maggi', ' Burgers', ' Cheese Maggi', ' Bruschetta Bites', ' Sandwich', 'Noodles', ' Laksa Soup', ' Coffee', ' Veg Fritters', ' Vegetarian', ' Babycorn Manchurian', ' Mango Salad', 'Lassi', 'Coffee', ' Chaat', 'Pizza', ' Nachos', ' Salads', ' Peri Peri Fries', ' Brownie', ' Pasta Alfredo', ' Spaghetti', 'Chicken Grill', ' Biryani', ' Roti', ' Chicken Masala', 'Chaat', ' Pav Bhaji', ' Dabeli', ' Vada Pav', ' Tawa Pulav', ' Panipuri', ' Malpua', 'Pizza', ' Burgers', 'Burgers', ' Panneer Butter Masala', ' Pasta', ' Hot Dog', ' Fries', ' Cheesy Pizza', ' Potato Wedges', 'Sandwiches', ' Thick Shakes', ' Paan Shake', ' Brownie Bomb', ' Fries', ' Spinach Corn Sandwich', ' Paneer Peri Peri', 'Pizza', ' Pasta', ' Spinach Salsa', ' Garlic Bread', ' Lasagne', ' Bruschettas', ' Chocolate Bomb', 'Friendly Staff', 'Biryani', 'Paratha', ' Vada Pav', ' Chaat', ' Pav Bhaji', ' Panipuri', ' Masala Poori', ' Dhokla', 'Whole Meat Shawarma', ' Fries', 'Panneer Butter Masala', ' Lassi', ' Paratha', ' Jeera Rice', ' Paneer Parantha', ' Bhindi Fry', ' Veg Kolhapuri', 'Waffles', ' Pasta', ' Crispy Chicken', ' Honey Chilli Chicken', ' Sandwich', ' Coffee', ' Crepe', 'Masala Papad', ' Paneer Satay', ' Vegetable Biryani', ' Panneer Butter Masala', ' Tomato Soup', ' Manchow Soup', ' Roti', 'Shawarma', ' Biryani', ' Bbq Chicken', ' Chicken Grill', ' Raita', 'Jowar Roti', ' Buttermilk', ' Sabudana Vada', ' Thali', ' Mirchi Bajji', ' Brinjal Curry', 'Burgers', ' Caramel Milkshake', ' Iced Coffee', ' Iced Tea', 'Supreme Pizza', ' Potato Wedges', ' Masala Lemonade', ' Tandoori Paneer', ' Exotic Garlic Bread', 'Neer Dosa', ' Sea Food', ' Ghee Rice', ' Marwai Sukka', ' Egg Biryani', ' Kori Roti', ' Thai Fish', 'White Pasta', ' Noodles', ' Babycorn Manchurian', ' Pizza', ' Veg Momos', ' Fry Momos', ' Tawa Pulav', 'Masala Dosa', ' Sandwiches', ' Coffee', ' Onion Dosa', ' Rava Dosa', ' Chaat', ' Sandwich', 'French Fries', ' Burgers', 'Paratha', ' Boondi Raita', ' Roti', ' Kulfi', ' Panneer Butter Masala', ' Sweet Lassi', ' Dal Tadka', 'Veg Momos', ' Brownie Chocolate', ' Palak Chaat', ' Mutton Biryani', ' Dahipuri', ' Fish', ' Sea Food', 'Noodles', ' Pasta', ' Schezwan Momos', ' Chicken Momo', ' Teriyaki Chicken', ' Thukpa', ' Fries', 'Spring Roll', ' Manchow Soup', ' Babycorn Manchurian', ' Mint Cooler', ' Roti', ' Panneer Butter Masala', 'Biryani', 'Pho', ' Papaya Salad', ' Coffee', ' Veg Momos', ' Rolls', 'Waffles', ' Pasta', ' Coleslaw Sandwich', ' Choco Waffle', ' Tacos', ' Momos', ' Cheese Nachos', 'Pizza', ' Pasta', ' Salads', ' Calamari', ' Ravioli', ' Pumpkin Soup', ' Wine', 'Nachos', ' Cup Cake', ' Hot Chocolate Fudge', ' Salsa', ' Cheesy Garlic Bread', ' Waffles', ' Tiramisu', 'Lime Soda', ' Manchow Soup', ' Biryani', ' Lemon Chicken', ' Tomato Soup', ' Butter Naan', ' Chilli Chicken', 'Murgh Ghee Roast', ' Fish Fry', ' Ghee Roast Paneer', ' Neer Dosa', ' Appam', ' Sea Food', ' Chicken Sukha', 'Ragi Mudde', ' Lemon Chicken', ' Chilli Chicken', ' Chicken Curry', ' Mutton Chops', ' Mutton Biryani', 'Pasta', ' Potato Fries', ' Potato Wedges', ' Hot Chocolate', ' Chaat', ' Filter Coffee', ' Americano', 'Sweet Lassi', ' Gulab Jamun', ' Palak Paneer Paratha', ' Paneer Parantha', 'Pizza', ' White Sauce Pasta', ' Masala Garlic Bread', 'Burgers', ' Fries', ' Mojito', ' Chicken Burger', 'Paratha', ' Sandwich', ' Chilli Idli', ' Noodles', ' Sandwiches', ' Lime Soda', 'Chicken Biryani', ' Mutton Biryani', ' Egg Biryani', ' Chicken Fry', ' Mutton Fry', 'Chaat', ' Brownie', ' Onion Rings', ' Tomato Soup', ' Paneer Manchurian', ' Thali', ' Faluda', 'Sandwiches', ' Chocolate Sandwich', ' Chaat', ' Veg Cheese Sandwich', ' Chilli Cheese', ' Corn Sandwich', 'Prawn Biryani', ' Andhra Meal', ' Veg Thali', ' Fry Piece Biryani', ' Gongura Chicken', ' Paratha', 'Veggie Delight', ' Paneer Tikka', ' Salad', 'Faluda', ' Brownie', ' Sandwich', ' Kulfi', ' Fruit Salad', ' Sweet Lassi', ' Chocolate Fudge', 'Pav Bhaji', ' Masala Dosa', ' Idli Vada', ' Filter Coffee', ' Pongal', ' Curd Rice', ' Kesari Bath', 'Roti', ' Masala Dosa', 'Onion Rings', ' Pasta', ' Kadhai Paneer', ' Salads', ' Salad', ' Roti', ' Jeera Rice', 'Masala Papad', ' Paneer Satay', ' Paneer Hyderabadi', ' Butter Naan', ' Roti', 'Stuffed Mushroom', ' Chicken Biryani', ' Jal-jeera', 'Filter Coffee', ' Tea', ' Hot Coffee', ' Hatti Samosa', 'Masala Dosa', 'Badam Milk', ' Potato Twister', ' Pav Bhaji', ' Panipuri', ' Tikki Chaat', ' Cheese Cutlet', 'Pasta', ' Iced Tea', 'Roti', ' Butter Kulcha', ' Fried Rice', ' Tomato Soup', ' Curd Rice', ' Lunch Buffet', ' Paneer Tikka', 'Chicken Biryani', 'Chicken Biryani', ' Ragi Mudde', ' Mutton Chops', 'Masala Dosa', ' Coffee', 'Cocktails', ' Paneer Tikka', ' Rasmalai', ' Rabri', ' Masala Papad', ' Vegetable Biryani', ' Chicken Curry', 'Garlic Bread', ' Burgers', ' Sandwiches', ' Pizza', ' Hot Chocolate Fudge', ' Waffles', ' Potato Wedges', 'Veggie Pizza', ' Garlic Bread', ' Lava Cake', ' Chocolava', ' Spicy Paneer', 'Kharabath', ' Crispy Vada', ' Filter Coffee', ' Idli Vada', ' Idli Chutney', ' Kesari Bath', ' Gulab Jamun', 'Noodles', ' Veg Momos', ' Chowmein', ' Schezwan Fried Rice', ' Clear Soup', ' Sweet Corn Soup', ' Chicken Wings', 'Veg Thali', ' Matka Kulfi', ' Pongal', ' Buttermilk', ' Vada', ' Ragi Mudde', ' Curd Rice', 'Coffee', ' Sandwich', ' Hot Chocolate', ' Ice Tea', ' Sandwiches', ' Chicken Lasagne', ' Pizza', 'Jowar Roti', ' Buttermilk', ' Jolada Rotti', ' Brinjal Curry', ' Coffee', ' Pongal', ' Vegetable Biryani', 'Pizza', ' Garlic Bread', 'Sweet Pongal', ' Mini Idlis', ' Filter Coffee', ' Open Masala Dosa', ' Pakoda', ' Vada', ' Raagi Dosa', 'Paratha', ' Coffee', ' Halwa', ' Lassi', 'Vegetable Biryani', ' Pineapple Rasam', ' Cheese Garlic Naan', ' Potato Cheese Ball', ' Bamboo Chicken Biryani', ' Chicken Skewer', ' Fish', 'Malai Kofta', ' Sandwich', ' Naan', 'Momos', ' Lunch Buffet', ' Chocolate Nirvana', ' Thai Green Curry', ' Paneer Tikka', ' Dum Biryani', ' Chicken Biryani', 'Coffee', ' Masala Dosa', 'Sea Food', ' Neer Dosa', ' Anjal Masala Fry', ' Coconut Pudding', ' Chicken Biryani', ' Roti', ' Fish Curry', 'Tandoori Chicken', ' Rolls', 'Pasta', ' Nachos', ' Jalapeno Poppers', ' Burrito Bowl', ' Ferrero Rocher Shake', ' Falafal Sandwich', ' Banana Waffles', 'Pizza', ' Mocktails', ' Coffee', ' Nachos', ' Salad', ' Pasta', ' Sandwiches', 'Aloo Paratha', 'Rava Dosa', 'Momos', ' Mushroom Bruschettas', ' Jalapeno Cheeseballs', ' Pasta', ' Red Velvet Shake', ' Red Velvet Milkshake', ' Maggi', 'Pesto Pizza', 'Sandwiches', ' Noodles', ' Sandwich', 'Lasagne', ' Steak', ' Masala Quesadilla', ' Garlic Bread', 'Breakfast Buffet', ' Kulcha', ' Almond Soup', ' Lunch Buffet', ' Roti', ' Mocktails', ' Vegetarian', 'Hara Bhara Kabab', ' Crispy Corn', ' Patiala Paneer', ' Panneer Butter Masala', ' Dum Biryani', ' Jal-jeera', ' Butter Naan', 'Waffles', ' Cup Cake', ' Chocolate Cake', 'Waffles', 'Momos', ' Burgers', ' Rolls', ' Sandwich', ' Hot Chocolate', ' Tea', ' Naga Thali', 'Freak Shake', ' Fries', ' Pasta Alfredo', 'Butter Chicken', ' Chicken Grill', ' Tandoori Chicken', ' Noodles', ' Biryani', ' Fried Rice Chicken', 'Pizza', ' Pasta', ' Nachos', ' Burgers', ' Sandwich', ' Tea', ' Cheese Balls', 'Masala Chai', ' Coffee', ' Masala Tea', ' Maggi', 'Pasta', ' Gelato', ' Garlic Bread', ' Mojito', ' Nachos', ' Paneer Pizza', ' Mocktails', 'Salad', ' Mocktails', ' Chocolava', ' Pesto Pasta', ' Momos', ' Jasmine Rice', ' Tom Yum Soup', 'Vada', ' Masala Dosa', ' Idli', 'Coffee', ' Masala Dosa', ' Vegetable Biryani', 'Pasta', ' Pizza', ' Burgers', ' Pancakes', ' Garlic Bread', 'Maggi', ' Pasta', ' Burgers', ' Sandwiches', ' Fries', 'Sea Food', ' Neer Dosa', 'Pizza', 'Brownie', ' Mojito', ' Prawn Ghee Roast', ' Corn Tikki', ' Mushroom Ghee Roast', ' Pinacolada', ' Biryani', 'Biryani', ' Thick Shakes', ' Fries', 'Vegetarian', 'Sweet Pongal', ' Curd Rice', 'Pesto Pizza', ' Buffalo Wings', ' Cocktails', ' Beer', ' Egg Ghee Roast', ' Brownie', ' Pepper Chicken', 'Waffles', ' Sandwiches', ' Maggi', ' Thick Shakes', ' Chocolate Sandwich', 'Honey Chilli Potatoes', ' Nachos', ' Lemonade', ' Hakka Noodles', ' Babycorn Manchurian', ' Mushroom Manchurian', ' Spring Roll', 'Manchow Soup', ' Momos', ' Wonton Soup', ' Chowmein', ' American Chopsuey', ' Chop Suey', ' Manchurian', 'Pasta', ' Pizza', ' Nachos', ' Gelato', ' Ravioli', ' Jalapeno Poppers', ' Cocktails', 'Masala Vada Pav', ' Cheese Vada Pav', ' Fries', 'Corn Augratin', ' Lasagne', ' Fries', ' Garlic Bread', ' Pili Pili Pizza', ' Mexican Rice', ' Ravioli', 'Salads', ' Sandwiches', ' Pasta', ' Mocktails', ' Corn Sandwich', ' Garden Salad', ' Miso Salad', 'Coffee', ' Thali', ' Kharabath', ' Masala Dosa', ' Idli Vada', ' Vanilla Ice Cream', ' Bisi Bele Bhaath', 'Veg Jaipuri', ' Neer Dosa', ' Kulcha', ' Pepper Chicken', ' Chilli Mushroom', ' Murgh Platter', ' Paneer Tikka', 'Pizza', ' Pasta Alfredo', ' Brownie', ' Garlic Bread', ' Sandwiches', ' Lemon Mojito', ' Mocha Fudge', 'Mocktails', ' Tomato Soup', ' Pasta', ...]
Displaying the indices of dataset
df.index=range(df.shape[0])
display(df.index)
RangeIndex(start=0, stop=23248, step=1)
Finding out the most liked dishes and getting it value counts and displaying the top most 30 liked dishes
print("Count of Most liked dishes")
favourite_food = pd.Series(likes).value_counts()
display(favourite_food.head(30))
Count of Most liked dishes
Pasta 2692 Pizza 1915 Cocktails 1880 Burgers 1736 Mocktails 1623 Biryani 1307 Sandwiches 1287 Burgers 1256 Coffee 1184 Nachos 1170 Fish 1116 Paratha 1107 Salads 1055 Chicken Biryani 1004 Cocktails 891 Fries 876 Noodles 854 Beer 835 Mutton Biryani 832 Tea 819 Coffee 801 Sandwich 788 Butter Chicken 782 Thali 770 Biryani 749 Pizza 747 Roti 729 Brownie 726 Salad 677 Hot Chocolate 672 dtype: int64
plotting the bar graph with the Most liked foods and number of likes it got and saving the PNG file
ax = favourite_food.nlargest(n=20, keep='first').plot(kind='bar',figsize=(20,15),title = 'Top 30 Favourite Food counts ')
for i in ax.patches:
ax.annotate(str(i.get_height()), (i.get_x() * 1.005, i.get_height() * 1.005))
plt.savefig('Top 30 Favourite Food counts.png')
Displaying the value counts of Restaurants type
print (df['rest_type'].value_counts().head(50))
Casual Dining 7326 Quick Bites 5250 Cafe 2375 Dessert Parlor 1083 Casual Dining, Bar 985 Delivery 704 Bar 358 Takeaway, Delivery 342 Bar, Casual Dining 336 Pub 324 Fine Dining 311 Casual Dining, Cafe 304 Lounge 298 Pub, Casual Dining 224 Food Court 220 Bakery 166 Cafe, Casual Dining 159 Dessert Parlor, Cafe 143 Beverage Shop, Quick Bites 137 Beverage Shop 137 Cafe, Bakery 128 Casual Dining, Pub 124 Microbrewery, Casual Dining 121 Cafe, Dessert Parlor 111 Sweet Shop 105 Bakery, Quick Bites 96 Sweet Shop, Quick Bites 88 Pub, Microbrewery 76 Dessert Parlor, Quick Bites 71 Bakery, Cafe 69 Bakery, Dessert Parlor 62 Quick Bites, Dessert Parlor 55 Mess 49 Dessert Parlor, Beverage Shop 49 Casual Dining, Microbrewery 47 Kiosk 44 Beverage Shop, Dessert Parlor 44 Microbrewery, Pub 42 Quick Bites, Sweet Shop 41 Cafe, Quick Bites 40 Fine Dining, Bar 40 Pub, Cafe 40 Lounge, Casual Dining 37 Club 37 Lounge, Bar 36 Quick Bites, Bakery 35 Food Court, Quick Bites 34 Quick Bites, Beverage Shop 31 Pub, Bar 28 Microbrewery 28 Name: rest_type, dtype: int64
Plotting the bar graph for the Restaurant Types and saving the PNG file
plt.figure(figsize=(15,12))
rest = df['rest_type'].value_counts()[:20]
sns.barplot(x=rest,y=rest.index)
plt.title('Restaurant Types')
plt.savefig('Restaurants Types.png')
plt.xlabel('count')
plt.show()
Label Encoding the online order column from the dataset for fitting the alggorithm
df['online_order']=df['online_order'].astype('category')
df['online_order']=df['online_order'].cat.codes
print(df.online_order.value_counts())
1 16378 0 6870 Name: online_order, dtype: int64
Label Encoding the Book table column from the dataset for fitting the alggorithm
df['book_table']=df['book_table'].astype('category')
df['book_table']=df['book_table'].cat.codes
df.book_table=pd.to_numeric(df.book_table)
df.book_table.value_counts()
0 17191 1 6057 Name: book_table, dtype: int64
Label Encoding all the required columns from the dataset for fitting the alggorithm
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df.location = le.fit_transform(df.location)
df.rest_type=le.fit_transform(df.rest_type)
df.cuisines=le.fit_transform(df.cuisines)
df.menu_item=le.fit_transform(df.menu_item)
Getting all the required columns to the new dataframe and exporting it as a comma seperated file(csv).
my_data = df.iloc[:,[2,3,4,5,6,7,9,10,12]]
my_data.to_csv('cleaned_data_zomato.csv',index=False)
Assigning the dependent and independent variables
x = df.iloc[:,[2,3,5,6,7,9,10,12]]
y = df['rate']
Splitting the dependent and independent variable into training and test dataset using train test split.
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=10)
Fitting the Linear Regression Model with the train dependent and train independent variable and getting the ,r2 Score between the predicted values and dependent test dataset
lr = LinearRegression()
lr.fit(x_train,y_train)
y_pred=lr.predict(x_test)
from sklearn.metrics import r2_score
print(r2_score(y_test,y_pred))
0.2281882852296726
Fitting the Random Forest Regressor Model with the train dependent and train independent variable and getting the r2 Score between the predicted values and dependent test dataset
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=650,random_state=245,min_samples_leaf=.0001)
rf.fit(x_train,y_train)
y_pred = rf.predict(x_test)
print(r2_score(y_test,y_pred))
0.8809706960047533
Fitting the Extra Trees Regressor Model with the train dependent and train independent variable and getting the r2 Score between the predicted values and dependent test dataset
ET = ExtraTreesRegressor(n_estimators=120)
ET.fit(x_train,y_train)
y_pred=ET.predict(x_test)
print(r2_score(y_test,y_pred))
0.932504537384086
Passing some of list parameter for the Random Forest Regressor to run with RandomizedSearchCV
n_estimators = [100,200,300,400,500,600,700,800]
max_features = ['auto','sqrt']
max_depth = [int(x) for x in np.linspace(5,30,num=6)]
min_samples_split=[2,5,10,15,100]
min_samples_leaf = [1,2,5,10]
min_weight_fraction_leaf = [0.0,0.1,0.2,0.3,0.4,0.5]
Fitting the Random Forest Regressor model with the above mentioned parameter in the RandomizedSearchCV Algorithm
random_grid = {'n_estimators':n_estimators,
'max_features':max_features,
'max_depth':max_depth,
'min_samples_split':min_samples_split,
'min_samples_leaf':min_samples_leaf,
'min_weight_fraction_leaf':min_weight_fraction_leaf}
rf = RandomForestRegressor()
rf = RandomizedSearchCV(estimator=rf,param_distributions=random_grid,
scoring='r2',n_iter=100,cv=10,verbose=0,random_state=42,n_jobs=1)
rf.fit(x_train,y_train)
RandomizedSearchCV(cv=10, estimator=RandomForestRegressor(), n_iter=100,
n_jobs=1,
param_distributions={'max_depth': [5, 10, 15, 20, 25, 30],
'max_features': ['auto', 'sqrt'],
'min_samples_leaf': [1, 2, 5, 10],
'min_samples_split': [2, 5, 10, 15,
100],
'min_weight_fraction_leaf': [0.0, 0.1,
0.2, 0.3,
0.4, 0.5],
'n_estimators': [100, 200, 300, 400,
500, 600, 700, 800]},
random_state=42, scoring='r2')
Getting the Best Parameters of the algorithm,r2 score between the predicted values and dependent test dataset
print(rf.best_params_)
y_pred = rf.predict(x_test)
display('\n',r2_score(y_test,y_pred))
{'n_estimators': 700, 'min_weight_fraction_leaf': 0.0, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'auto', 'max_depth': 25}
'\n'
0.8969384866998245
create the pickle file and Loading the Pickle file with the algorithm which gives highest accuracy score and predicting the dependent variable with the r2 Score and best parameters
import pickle
pickle.dump(ET,open('ET Pickle File.pkl','wb'))
model = pickle.load(open('ET Pickle File.pkl','rb'))
pred1 = model.predict(x_test)
print (r2_score(pred1,y_test))
0.9262784174224225